library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0     ✔ purrr   0.2.5
## ✔ tibble  1.4.2     ✔ dplyr   0.7.8
## ✔ tidyr   0.8.1     ✔ stringr 1.3.1
## ✔ readr   1.1.1     ✔ forcats 0.3.0
## ── Conflicts ─────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(rvest)
## Loading required package: xml2
## 
## Attaching package: 'rvest'
## The following object is masked from 'package:purrr':
## 
##     pluck
## The following object is masked from 'package:readr':
## 
##     guess_encoding
library(httr)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:httr':
## 
##     config
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout

The last official census in the USA was in 2010. We will use the 2017 United States Census Bureau estimate population for the years of 2017, 2016, 2015 and 2014 as we do not expect a significant population change.

url = "https://www.census.gov/quickfacts/fact/table/newyorkcitynewyork,bronxcountybronxboroughnewyork,kingscountybrooklynboroughnewyork,newyorkcountymanhattanboroughnewyork,queenscountyqueensboroughnewyork,richmondcountystatenislandboroughnewyork/PST045217"

nyc_population = read_html(url) %>%  html_nodes(css = "table") %>% .[[1]] %>% 
  html_table(header = TRUE) %>% as.tibble() %>% 
  janitor::clean_names()

names(nyc_population)[1:7] = c("estimate_date", "new_york_city", "bronx", "brooklyn", "manhattan", "queens", "staten_island")

nyc_population = nyc_population %>% 
  gather(key = boro_nm, value = population, estimate_date:staten_island) %>% 
  mutate(population = if_else(population == "Population estimates, July 1, 2017,  (V2017)", "2017", population),
         population = as.numeric(gsub("," , "", population)))
crime_df = readRDS(file = "datasets/nyc_felony_misdemeanor.rds") 

crime_df = crime_df %>% 
  mutate(boro_nm = if_else(boro_nm == "staten island", "staten_island", boro_nm))

Crime rates in categories (felony, misdemeanor) by borough

grouped_df = crime_df %>% group_by(boro_nm, law_cat_cd, year) %>% 
  summarise(number = n())

full = left_join(grouped_df, nyc_population, by = "boro_nm") %>% 
  mutate(crime_rate = (number/population)*100)

x = ggplot(full, aes(x = year, y = crime_rate, color = boro_nm)) + 
  geom_line() + 
  facet_grid(~law_cat_cd)

ggplotly(x)